Gapminder Analysis

In this particular analysis we will be looking at the trends over time of life expectancy versus a country’s gross domestic product or GDP. Gross domestic product (GDP) is the standard measure of the value added created through the production of goods and services in a country during a certain period. As such, it also measures the income earned from that production, or the total amount spent on final goods and services (less imports)[1].

(If you have time, try creating new plots from other gapminder data!)

Analysis Plan

  1. Import data
  2. Analyze data
  3. Plot data
  4. Draw amazing conclusions!

Import libraries and data

library("ggplot2")
source("../bin/chunk-options.R")
knitr_fig_path("01-")
# Silently load in the data so the rest of the lesson works
gapminder <- read.csv("../data/gapminder_data.csv", header = TRUE)

{: .language-r}

Preview the Gapminder data

head(gapminder)

{: .language-r}

<div data-pagedtable="false">
  <script data-pagedtable-source type="application/json">
{"columns":[{"label":[""],"name":["_rn_"],"type":[""],"align":["left"]},{"label":["country"],"name":[1],"type":["chr"],"align":["left"]},{"label":["year"],"name":[2],"type":["int"],"align":["right"]},{"label":["pop"],"name":[3],"type":["dbl"],"align":["right"]},{"label":["continent"],"name":[4],"type":["chr"],"align":["left"]},{"label":["lifeExp"],"name":[5],"type":["dbl"],"align":["right"]},{"label":["gdpPercap"],"name":[6],"type":["dbl"],"align":["right"]}],"data":[{"1":"Afghanistan","2":"1952","3":"8425333","4":"Asia","5":"28.801","6":"779.4453","_rn_":"1"},{"1":"Afghanistan","2":"1957","3":"9240934","4":"Asia","5":"30.332","6":"820.8530","_rn_":"2"},{"1":"Afghanistan","2":"1962","3":"10267083","4":"Asia","5":"31.997","6":"853.1007","_rn_":"3"},{"1":"Afghanistan","2":"1967","3":"11537966","4":"Asia","5":"34.020","6":"836.1971","_rn_":"4"},{"1":"Afghanistan","2":"1972","3":"13079460","4":"Asia","5":"36.088","6":"739.9811","_rn_":"5"},{"1":"Afghanistan","2":"1977","3":"14880372","4":"Asia","5":"38.438","6":"786.1134","_rn_":"6"}],"options":{"columns":{"min":{},"max":[10]},"rows":{"min":[10],"max":[10]},"pages":{}}}
  </script>
</div>

{: .output}

tail(gapminder)

{: .language-r}

<div data-pagedtable="false">
  <script data-pagedtable-source type="application/json">
{"columns":[{"label":[""],"name":["_rn_"],"type":[""],"align":["left"]},{"label":["country"],"name":[1],"type":["chr"],"align":["left"]},{"label":["year"],"name":[2],"type":["int"],"align":["right"]},{"label":["pop"],"name":[3],"type":["dbl"],"align":["right"]},{"label":["continent"],"name":[4],"type":["chr"],"align":["left"]},{"label":["lifeExp"],"name":[5],"type":["dbl"],"align":["right"]},{"label":["gdpPercap"],"name":[6],"type":["dbl"],"align":["right"]}],"data":[{"1":"Zimbabwe","2":"1982","3":"7636524","4":"Africa","5":"60.363","6":"788.8550","_rn_":"1699"},{"1":"Zimbabwe","2":"1987","3":"9216418","4":"Africa","5":"62.351","6":"706.1573","_rn_":"1700"},{"1":"Zimbabwe","2":"1992","3":"10704340","4":"Africa","5":"60.377","6":"693.4208","_rn_":"1701"},{"1":"Zimbabwe","2":"1997","3":"11404948","4":"Africa","5":"46.809","6":"792.4500","_rn_":"1702"},{"1":"Zimbabwe","2":"2002","3":"11926563","4":"Africa","5":"39.989","6":"672.0386","_rn_":"1703"},{"1":"Zimbabwe","2":"2007","3":"12311143","4":"Africa","5":"43.487","6":"469.7093","_rn_":"1704"}],"options":{"columns":{"min":{},"max":[10]},"rows":{"min":[10],"max":[10]},"pages":{}}}
  </script>
</div>

{: .output}

names(gapminder)

{: .language-r}

[1] "country"   "year"      "pop"       "continent" "lifeExp"   "gdpPercap"

{: .output}

ncol(gapminder)

{: .language-r}

[1] 6

{: .output}

length(gapminder)

{: .language-r}

[1] 6

{: .output}

dim(gapminder)

{: .language-r}

[1] 1704    6

{: .output}

nrow(gapminder)

{: .language-r}

[1] 1704

{: .output}

A statistical overview can be obtained with summary():

summary(gapminder)

{: .language-r}

   country               year           pop             continent        
 Length:1704        Min.   :1952   Min.   :6.001e+04   Length:1704       
 Class :character   1st Qu.:1966   1st Qu.:2.794e+06   Class :character  
 Mode  :character   Median :1980   Median :7.024e+06   Mode  :character  
                    Mean   :1980   Mean   :2.960e+07                     
                    3rd Qu.:1993   3rd Qu.:1.959e+07                     
                    Max.   :2007   Max.   :1.319e+09                     
    lifeExp        gdpPercap       
 Min.   :23.60   Min.   :   241.2  
 1st Qu.:48.20   1st Qu.:  1202.1  
 Median :60.71   Median :  3531.8  
 Mean   :59.47   Mean   :  7215.3  
 3rd Qu.:70.85   3rd Qu.:  9325.5  
 Max.   :82.60   Max.   :113523.1  

{: .output}

We can plot the life expectancy:

plot(lifeExp ~ year, gapminder)

{: .language-r}

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
  geom_point()

{: .language-r}

ggplot(data = gapminder, mapping = aes(x=year, y=lifeExp, by=country, olor=continent)) + geom_line()

{: .language-r}

ggplot(data = gapminder, mapping = aes(x=year, y=lifeExp, by=country, color=continent)) +
  geom_line() + geom_point()

{: .language-r}

ggplot(data = gapminder, mapping = aes(x=year, y=lifeExp, by=country)) +
  geom_line(mapping = aes(color=continent)) + geom_point()

{: .language-r}

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
  geom_point()

{: .language-r}

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
  geom_point(alpha = 0.5) + scale_x_log10()

{: .language-r}

Scatterplot of GDP vs life expectancy showing logarithmic x-axis data spread

Scatterplot of GDP vs life expectancy showing logarithmic x-axis data spread

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
  geom_point(alpha = 0.5) + scale_x_log10() + geom_smooth(method="lm")

{: .language-r}

`geom_smooth()` using formula = 'y ~ x'

{: .output}

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
  geom_point(alpha = 0.5) + scale_x_log10() + geom_smooth(method="lm", size=1.5)

{: .language-r}

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.

{: .warning}

`geom_smooth()` using formula = 'y ~ x'

{: .output}

americas <- gapminder[gapminder$continent == "Americas",]
ggplot(data = americas, mapping = aes(x = year, y = lifeExp)) +
  geom_line() +
  facet_wrap( ~ country) +
  theme(axis.text.x = element_text(angle = 45))

{: .language-r}

ggplot(data = americas, mapping = aes(x = year, y = lifeExp, color=continent)) +
  geom_line() + facet_wrap( ~ country) +
  labs(
    x = "Year",              # x axis title
    y = "Life expectancy",   # y axis title
    title = "Figure 1",      # main title of figure
    color = "Continent"      # title of legend
  ) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

{: .language-r}

lifeExp_plot <- ggplot(data = americas, mapping = aes(x = year, y = lifeExp, color=continent)) +
  geom_line() + facet_wrap( ~ country) +
  labs(
    x = "Year",              # x axis title
    y = "Life expectancy",   # y axis title
    title = "Figure 1",      # main title of figure
    color = "Continent"      # title of legend
  ) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave(filename = "results/lifeExp.png", plot = lifeExp_plot, width = 12, height = 10, dpi = 300, units = "cm")

{: .language-r}


[1] https://data.oecd.org/gdp/gross-domestic-product-gdp.htm

Sources: